
clear
clear all
clear mata
clear matrix
set more off
set matsize 11000
set maxvar 30000
cap log off
capture log close
set emptycells drop
pause on



***********************************************
* USER DEFINE FILEPATH FOR REPLICATION FOLDER *
***********************************************

global path "C:\Users\wb520443\Dropbox\Research Projects\Global pollution\2_analysis\Replication"



***********************************************


global opts		a f plain coll(none) nodep nomti c(b(star fmt(%9.3f)) se(abs par fmt(%9.3f))  ) star(* .10 ** .05 *** .01) noobs nocons

global opts_fs		a f plain coll(none) nodep nomti c(b(star fmt(%9.3f)) se(abs par fmt(%9.3f))) star(* .10 ** .05 *** .01) noobs nocons


global input "$path\data"
global figdat "$path\figdat"
global figures "$path\figures"
global tables "$path\tables"


	foreach t in 30 {
	    
		
	    
		cap mat define countryfigure=J(1100,35,.)
			local row=1
		
		use "$input/rwi_pollution_analysis_cities.dta", replace
			drop if id==6062 & country=="MWI"
			merge 1:1 quadkey latitude longitude rwi error using "$input/ghs_vnres_rwi_table.dta", keep(1 3) nogen
			merge 1:1 quadkey latitude longitude rwi error using "$input/panel_rwi_dem_table.dta", keep(1 3) nogen
				rename std dem_std
		
			append using "$input/delhi_official_cities.dta"
			append using "$input/lagos_official_cities.dta"
			append using "$input/tbilisi_official_cities.dta"
			append using "$input/rio_official_cities.dta"

		replace error=1/error
		g both=population*error
		g both2=country_population*error
		
		g city_code2=city_code
		
		drop if city_code==.
	
		replace city_code=expanded_code
		
		replace city_code=city_code2 if city_code==.

		
		bysort city_code:egen sd_elev=sd(dem)
		bysort city_code:egen max_elev=max(dem)
		bysort city_code:egen min_elev=min(dem)
		g diff_elev=max_elev-min_elev
	
		drop if points_within_city<`t'
		
		
		preserve
			keep city_code diff_elev 
			duplicates drop city_code, force
			sort city_code
			g id=_n
		
			save "$input/temp_elev.dta", replace
		restore
		
		
		levelsof city_code, local(levels) 
		
		egen total_pollution=rowtotal( power industry agri transport other)
		foreach var in  power industry agri transport other{
			g share_`var'=`var'/total_pollution
		}
		
		g region=""
		{
			replace region="SSA" if country=="AGO"
			replace region="LAC" if country=="ARG"
			replace region="ECA" if country=="ARM"
			replace region="SSA" if country=="BEN"
			replace region="SSA" if country=="BFA"
			replace region="SAR" if country=="BGD"
			replace region="ECA" if country=="BIH"
			replace region="LAC" if country=="BOL"
			replace region="LAC" if country=="BRA"
			replace region="SSA" if country=="CIV"
			replace region="SSA" if country=="CMR"
			replace region="SSA" if country=="COD"
			replace region="LAC" if country=="COL"
			replace region="LAC" if country=="CRI"
			replace region="LAC" if country=="DOM"
			replace region="MENA" if country=="DZA"
			replace region="LAC" if country=="ECU"
			replace region="SSA" if country=="ETH"
			replace region="ECA" if country=="GEO"
			replace region="SSA" if country=="GHA"
			replace region="SSA" if country=="GIN"
			replace region="LAC" if country=="GTM"
			replace region="LAC" if country=="HND"
			replace region="LAC" if country=="HTI"
			replace region="EAP" if country=="IDN"
			replace region="SAR" if country=="IND_PAK"
			replace region="LAC" if country=="JAM"
			replace region="MENA" if country=="JOR"
			replace region="ECA" if country=="KAZ"
			replace region="SSA" if country=="KEN"
			replace region="ECA" if country=="KGZ"
			replace region="EAP" if country=="KHM"
			replace region="SSA" if country=="LBR"
			replace region="MENA" if country=="LBY"
			replace region="SAR" if country=="LKA"
			replace region="ECA" if country=="MDA"
			replace region="SSA" if country=="MDG"
			replace region="LAC" if country=="MEX"
			replace region="ECA" if country=="MKD"
			replace region="SSA" if country=="MLI"
			replace region="EAP" if country=="MNG"
			replace region="SSA" if country=="MOZ"
			replace region="EAP" if country=="MYS"
			replace region="SSA" if country=="NGA"
			replace region="LAC" if country=="NIC"
			replace region="SAR" if country=="NPL"
			replace region="LAC" if country=="PER"
			replace region="EAP" if country=="PHL"
			replace region="LAC" if country=="PRY"
			replace region="ECA" if country=="ROU"
			replace region="SSA" if country=="RWA"
			replace region="SSA" if country=="SEN"
			replace region="ECA" if country=="SLV"
			replace region="LAC" if country=="SRB"
			replace region="SSA" if country=="TCD"
			replace region="SSA" if country=="TGO"
			replace region="EAP" if country=="THA"
			replace region="ECA" if country=="TJK"
			replace region="ECA" if country=="TKM"
			replace region="MENA" if country=="TUN"
			replace region="ECA" if country=="TUR"
			replace region="SSA" if country=="TZA"
			replace region="SSA" if country=="UGA"
			replace region="ECA" if country=="UKR"
			replace region="ECA" if country=="UZB"
			replace region="EAP" if country=="VNM"
			replace region="SSA" if country=="ZAF"
			replace region="SSA" if country=="ZMB"
			replace region="SSA" if country=="ZWE"

		}
		
		preserve
			keep city_code country share_* city_code2 region
			
			duplicates drop city_code, force
			save "$input/city_country.dta",replace
		restore
			
		//Mean, mean_ss, and max refer to how PM25 across years and months is collapse (e.g. max is the max pollution level for a RWI point across all years and months in the sample).
		foreach l of local levels {
		
				
			
			cap reghdfe mean_pm25 rwi [pweight=error] if city_code==`l', noabsorb
				cap mat def countryfigure[`row',1]=_b[rwi]
				cap mat def countryfigure[`row',2]=_se[rwi]
			cap reghdfe mean_pm25_ss rwi [pweight=error] if city_code==`l', noabsorb
				cap mat def countryfigure[`row',16]=_b[rwi]
				cap mat def countryfigure[`row',17]=_se[rwi]	
			cap reghdfe rwi dem [pweight=error]if city_code==`l', noabsorb
				cap mat def countryfigure[`row',3]=_b[dem]
				cap mat def countryfigure[`row',4]=_se[dem]
			cap mat def countryfigure[`row',5]=`l'
			sum dem if city_code==`l'
				cap mat def countryfigure[`row',6]=r(mean)
				cap mat def countryfigure[`row',7]=r(sd)
			sum rwi if city_code==`l', d
				cap mat def countryfigure[`row',8]=r(kurtosis)
				cap mat def countryfigure[`row',9]=r(skewness)
				cap mat def countryfigure[`row',13]=r(mean)
				cap mat def countryfigure[`row',14]=r(sd)
			sum mean_pm25 if city_code==`l', d
				cap mat def countryfigure[`row',10]=r(kurtosis)
				cap mat def countryfigure[`row',11]=r(skewness)	
				cap mat def countryfigure[`row',12]=r(max)-r(min)	
				cap mat def countryfigure[`row',15]=r(sd)
			sum mean_pm25_ss if city_code==`l', d
				cap mat def countryfigure[`row',18]=r(kurtosis)
				cap mat def countryfigure[`row',19]=r(skewness)	
				cap mat def countryfigure[`row',20]=r(max)-r(min)	
				cap mat def countryfigure[`row',21]=r(sd)	
				cap mat def countryfigure[`row',22]=r(mean)	
			cap reghdfe ghs_vnres_mean dem if city_code==`l', noabsorb
				cap mat def countryfigure[`row',23]=_b[dem]
				cap mat def countryfigure[`row',24]=_se[dem]	
			cap reghdfe ghs_vnres_mean dem_std if city_code==`l', noabsorb
				cap mat def countryfigure[`row',25]=_b[dem_std]
				cap mat def countryfigure[`row',26]=_se[dem_std]		
			local row=`row'+1
		
		}	
	


		
	clear
				
			svmat2 countryfigure

			drop if countryfigure1==.
			
			g country_code=_n
				run "$input/labels.do"
			
			g HI95_mean=(1.96*countryfigure2)+countryfigure1
			g LI95_mean=countryfigure1-(1.96*countryfigure2)

			g HI99_mean=(2.06*countryfigure2)+countryfigure1
			g LI99_mean=countryfigure1-(2.06*countryfigure2)
			
			
			g HI95_mean_ss=(1.96*countryfigure17)+countryfigure16
			g LI95_mean_ss=countryfigure16-(1.96*countryfigure7)

			g HI99_mean_ss=(2.06*countryfigure17)+countryfigure16
			g LI99_mean_ss=countryfigure16-(2.06*countryfigure17)
			
			foreach var in HI95_mean LI95_mean HI99_mean LI99_mean HI95_mean_ss LI95_mean_ss HI99_mean_ss LI99_mean_ss{
					replace `var'=-10 if `var'<-10
					replace `var'=10 if `var'>10
			}
			
			g tstat=countryfigure3/countryfigure4
			g tstat2=countryfigure1/countryfigure2
			g tstat3=abs(tstat2)
			
			
			
			g sig=(abs(tstat)>2.06&abs(tstat2>2.06))
			
			set scheme plotplain
				
				
			
			rename (countryfigure1 countryfigure2 countryfigure3 countryfigure4 countryfigure5 countryfigure6 countryfigure7 countryfigure8 countryfigure9 countryfigure10 countryfigure11 countryfigure12 countryfigure13 countryfigure14 countryfigure15 countryfigure16 countryfigure17 countryfigure18 countryfigure19 countryfigure20 countryfigure21 countryfigure22) (mean_beta pollution_se mean_elevation_beta elevation_se city_code average_elevation sd_elevation rwi_kurtosis rwi_skewness pm_kurtosis pm_skewness pm_range rwi_mean rwi_sd pm_sd mean_beta_ss pollution_se_ss pm_kurtosis_ss pm_skewness_ss pm_range_ss pm_sd_ss pm_mean_ss)
			
			g tstat2_ss=mean_beta_ss/pollution_se_ss
			g tstat3_ss=abs(tstat2_ss)
			
			g pm_change=mean_beta*rwi_sd
			g pm_range_share=pm_change/pm_range
			g abs_range_share=abs(pm_range_share)
			g abs_beta=abs(mean_beta)
			
			g pm_change_ss=mean_beta_ss*rwi_sd
			g pm_range_share_ss=pm_change_ss/pm_range_ss
			g abs_range_share_ss=abs(pm_range_share_ss)
			g abs_beta_ss=abs(mean_beta_ss)
			
			preserve	
				drop H* L* country*
				export delimited "$input/city_pollution_elevation_coefficients.csv", replace
			restore 
			
			merge 1:1 city_code using "$input/city_country.dta", nogen
			
			g id=_n
			merge 1:1 id using "$input/temp_elev.dta", nogen
			
			g bin=1 if diff_elev<200
			replace bin=2 if  diff_elev>200 & diff_elev<400
			replace bin=3 if  diff_elev>400 & diff_elev<600
			replace bin=4 if  diff_elev>800 & diff_elev<1000
			replace bin=5 if  diff_elev>1000 
			
		
			
			gsort mean_beta
			
			g n=_n
			
			gsort mean_elevation_beta
			
			g n2=_n
			
			gsort  mean_beta_ss
			
			g nss=_n	
				
		egen rowmax=rowmax( share_power share_industry share_agri share_transport share_other)
		foreach var in  share_power share_industry share_agri share_transport share_other{
			g max_`var'=(`var'==rowmax)
		}	
		
		preserve
			drop if mean_beta<-5
			drop if mean_beta_ss<-5
			foreach var in  share_power share_industry share_agri share_transport {
				twoway (rspike HI99_mean LI99_mean n if max_`var'==1 , lc(gs10)) ///
						(scatter mean_beta n if max_`var'==1, mc(cranberry%50)  msize(tiny ) m(S) xla(none,nogrid) ///
						ytitle("City level correlation between" "wealth and total pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))   ysize(3) xsize(6) legend(off) yline(0))
						
						graph export "$figures/city_figure_by_source_`var'_`t'.png", as(png) replace
						
			}
			
			foreach var in  share_power share_industry share_agri share_transport share_other{
				twoway (rspike HI99_mean_ss LI99_mean_ss nss if max_`var'==1 , lc(gs10)) ///
						(scatter mean_beta_ss nss if max_`var'==1, mc(navy%50)  msize(tiny ) m(S) xla(none,nogrid) ///
						ytitle("City level correlation between" "wealth and anthropogenic pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))    ysize(3) xsize(6) legend(off) yline(0))
						
						graph export "$figures/city_figure_by_source_`var'_`t'_ss.png", as(png) replace
						
			}
		restore
		
		preserve
			drop if mean_beta<-5
			drop if mean_beta_ss<-5
			foreach var in  SSA LAC ECA MENA SAR EAP {
				twoway (rspike HI99_mean LI99_mean n if region=="`var'" , lc(gs10)) ///
						(scatter mean_beta n if region=="`var'", mc(cranberry%50)  msize(tiny ) m(S) xla(none,nogrid) ///
						ytitle("City level correlation between" "wealth and total pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))   ysize(3) xsize(6) legend(off) yline(0))
						
						graph export "$figures/city_figure_by_region_`var'_`t'.png", as(png) replace
						
			}
			
			foreach var in  SSA LAC ECA MENA SAR EAP {
				twoway (rspike HI99_mean_ss LI99_mean_ss nss if region=="`var'" , lc(gs10)) ///
						(scatter mean_beta_ss nss if region=="`var'", mc(navy%50)  msize(tiny ) m(S) xla(none,nogrid) ///
						ytitle("City level correlation between" "wealth and anthropogenic pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))    ysize(3) xsize(6) legend(off) yline(0))
						
						graph export "$figures/city_figure_by_region_`var'_`t'_ss.png", as(png) replace
						
			}
		restore
			
		merge m:1 country using "$input/colonization_history.dta"
		
		preserve
		drop if mean_beta<-5
		drop if mean_beta_ss<-5
		foreach var in  british french spanishportuguese other postsoviet equitorial never{
			twoway (rspike HI99_mean LI99_mean n if `var'==1 , lc(gs10)) ///
					(scatter mean_beta n if `var'==1, mc(cranberry%50)  msize(tiny ) m(S) xla(none,nogrid) ///
					ytitle("City level correlation between" "wealth and total pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))    ysize(3) xsize(6) legend(off) yline(0))
					
					graph export "$figures/city_figure_colo_`var'_`t'.png", as(png) replace
					
		}
		
		foreach var in  british french spanishportuguese other postsoviet equitorial never{
			twoway (rspike HI99_mean_ss LI99_mean_ss nss if `var'==1 , lc(gs10)) ///
					(scatter mean_beta_ss nss if `var'==1, mc(navy%50)  msize(tiny ) m(S) xla(none,nogrid) ///
					ytitle("City level correlation between" "wealth and anthropogenic pollution", size(vlarge)) xtitle("Cities ordered from least to most correlated", size(vlarge)) xsc(noline range(0 302)) ylabel(-10(5)10,nogrid labsize(large))   ysize(3) xsize(6) legend(off) yline(0))
					
					graph export "$figures/city_figure_colo_`var'_`t'_ss.png", as(png) replace
					
		}
		restore
		
			label var british "British"
			label var french "French"
			label var spanishportuguese "Spanish/Portuguese"
			label var postsoviet "Post-soviet"
			label var equitorial "Equitorial"
		
			reg mean_beta british french spanishportuguese other
				est sto t1c1
			
			reg mean_beta_ss british french spanishportuguese other
				est sto t1c2
				
			reg mean_beta equitorial
				est sto t1c3
			
			reg mean_beta_ss equitorial
				est sto t1c4	
				
			reg mean_beta postsoviet
				est sto t1c5
			
			reg mean_beta_ss postsoviet
				est sto t1c6
				
		file open  t	using "$tables/table_colonization.tex", replace write
		file write t	"\begin{table}[htbp] \centering" _n "\def\sym#1{\ifmmode^{#1}\else\(^{#1}\)\fi}" _n ///
					"\caption{Colonization history and city correlations}\label{tab:colonization}" _n  ///
					"\begin{tabular*}{1\textwidth}{@{\extracolsep{\fill}}l*{8}{c}}" _n "\midrule" _n ///
					"&Base&\shortstack{Dust and\\SS removed}&Base&\shortstack{Dust and\\SS removed}&Base&\shortstack{Dust and\\SS removed}\\" _n ///
					"\midrule" _n 
		file close t
		
		esttab t1* using "$tables/table_colonization.tex", l keep(british french spanishportuguese other equitorial postsoviet) ///
			s(N, l("N") f(%11.0fc) lay(@)) $opts  
	
		file open  t 	using "$tables/table_colonization.tex", append write
		file write t  "\midrule" _n "\end{tabular*}" _n ///
						"\begin{tabular*}{1\textwidth}{p{6in}}" _n ///
						"\footnotesize \textsc{Notes:} The unit of observation for all regressions is a city in our data. All columns regress the coefficient for the within-city correlation between RWI and average PM\$_{2.5}\$ as calculated with or without dust and sea salt (denoted in column headings) against a series of dummies indicating the colonization history of the country containing the city. In the first and second column the omitted category are countries that were never formally colonized by a European power prior to WWI. Equitorial countries are those between the Tropic of Capricorn and the Tropic of Cancer. Post-soviet countries are those that were formally part of the Soviet Union. \textit{p}-values reported in brackets. (* p$<$.10 ** p$<$.05 *** p$<$.01)." ///
						"\end{tabular*}" _n "\end{table}" _n 
		file close t	
		
	
		
	}
		
		
	
	
	
	
	*erase "$input/temp_elev.dta"
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	
	